# -*- coding:utf-8 -*- 
''' 
import codecs
import sys
from lxml import etree

tree = etree.HTML(open('http://news.baidu.com/','r').read())

nodes = tree.xpath("//div[@id='name']")
print(nodes[0]).text
'''
'''
import lxml.etree as etree
from urllib import urlopen    
dom = urlopen("http://news.baidu.com").read().decode('utf-8')

print len(dom)
print dom[0].tag
'''


import lxml.etree as etree

html = '<html><body id="1">abc<div>123</div>def<div>456<p>789</p><p id="2">ghi</p></div>jkl</body><javascript></javascript></html>'
dom = etree.fromstring(html)
doc = etree.tostring(dom)
print doc

print len(dom)

print dom[0].tag
for child in dom:
	print child.tag
	
body = dom[0]
print dom.index(body)

print body.getparent().tag

print body.getnext().tag

print 'stop'

print body.getchildren()

print body.getprevious()

print body.iterancestors()

print body.iterdescendants()

print body.itersiblings()

for ele in dom.iter():
	print ele.tag
	
print body.get('id')     #shuxing

print body.attrib                  

print body.text

print body.tail          #neirong

print body.xpath('text()')

print body.drop_tree

root = etree.Element('body')
print root
child1 = etree.SubElement(root, 'div')
print child
root.remove(child)
print (etree.tostring(root))
root.clear()
print (etree.tostring(root))


